#########################################################################################
# Title: Table1.R
# Author: Yuki Atsusaka (atsusaka@rice.edu)
# Aim: Code to replicate Table 1 in Atsusaka (2021)
#########################################################################################

# CLEAN THE GLOBAL ENVIRONMENT AND READ PACKAGES
rm(list=ls())
library(xtable)
library(tidyverse)

##############################################################
# COMPUTE ePCPs
##############################################################

dt <- read_csv(here::here("Data_LAMayoral.csv")) 
is.even <- function(x) x %% 2 == 0

dt1 <- dt
dt2 <- dt %>% filter(C < 40)                # Majority-white
dt3 <- dt %>% filter(C < 65 & C >= 40)      # Racially balanced
dt4 <- dt %>% filter(C >= 65)               # Majority-minority
dt5 <- dt %>% filter(city_type=="Urban")    # Urban
dt6 <- dt %>% filter(city_type=="Suburban") # Suburban 
dt7 <- dt %>% filter(city_type=="Rural")    # Rural
dt8 <- dt %>% filter(incumb_ran==0)         # Open Races
dt9 <- dt %>% filter(incumb_ran==1)         # Non-Open Races
dt10 <- dt %>% filter(unopposed==1)         # Unopposed elections
dt11 <- dt %>% filter(unopposed==0)         # Unopposed elections
dt12 <- dt %>% filter(is.even(year)==T)     # On-Cycle
dt13 <- dt %>% filter(is.even(year)==F)     # Off-Cycle
dt14 <- dt %>% filter(city_council=="AtLarge")  # At-Large City Councils
dt15 <- dt %>% filter(city_council=="District") # District City Councils
dt16 <- dt %>% filter(city_council=="Mixed")    # District City Councils
dt17 <- dt %>% filter(year <  1994)         # Before Southern Republican
dt18 <- dt %>% filter(year >= 1994)         # After  Southern Republican

dt_list <- list(dt1,dt2,dt3,dt4,dt5,
                dt6,dt7,dt8,dt9,dt10,
                dt11,dt12,dt13,dt14,dt15,
                dt16,dt17,dt18
                )

namvec <- c("All Districts", 
            "0<C<40", 
            "40<C<65",
            "65<C<100",
            "Urban",
            "Suburban",
            "Rural",
            "Open-Seat",
            "Not Open-Seat",
            "Uncontested Elections",
            "Contested Elections",
            "On-Cycle",
            "Off-Cycle",            
            "At-Large (Councils)",
            "District (Councils)",
            "Mixed (Councils)",            
            "Before 1994",
            "After 1994"
            )

df_insample <- data.frame(Subset = character(),
                          N = character(),
                          Logical = character(),
                          LMP = character(),
                          Logit = character(),
                          LogicalWin = character(),
                          LMPWin = character(),
                          LogitWin = character()
                          )

# LOOP OVER ELEMENTS OF THE DATA SET LIST
for(i in 1:length(dt_list)){

dat <- dt_list[[i]]      # Choose one subset of data

N = dim(dat)[1]
M  = dat$M      # M: Racial margin of victory
C  = dat$C      # C: % Black voters 


############################
# LOGICAL MODEL
############################
q.model = sqrt(M*C) - 50 # Geometric mean

true.run = dat$run       # Outcome I (Minority Candidate Emergence)
true.win = dat$win       # Outcome II  (Minority Candidate Victory)

p.model   = pnorm(q=q.model,  mean=0, sd=1)
ePCP.model.run = (sum(p.model[true.run==1]) + sum(1 - p.model[true.run==0]))/N
ePCP.model.win = (sum(p.model[true.win==1]) + sum(1 - p.model[true.win==0]))/N

############################
# LINEAR PROBABILITY MODELS
############################
lpm.run <- lm(run ~ M + C, data=dat)
p.lpm.run <- predict(lpm.run)
lpm.win <- lm(win ~ M + C, data=dat)
p.lpm.win <- predict(lpm.win)

ePCP.lpm.run <- (sum(p.lpm.run[true.run==1]) + sum(1 - p.lpm.run[true.run==0]))/N
ePCP.lpm.win <- (sum(p.lpm.win[true.win==1]) + sum(1 - p.lpm.win[true.win==0]))/N

############################
# LOGISTIC REGRESSIONS
############################
logit.run <- glm(run ~ M + C, data=dat, family=binomial)
p.logit.run <- predict(logit.run, type="response")
logit.win <- glm(win ~ M + C, data=dat, family=binomial)
p.logit.win <- predict(logit.win, type="response")

ePCP.logit.run <- (sum(p.logit.run[true.run==1]) + sum(1 - p.logit.run[true.run==0]))/N
ePCP.logit.win <- (sum(p.logit.win[true.win==1]) + sum(1 - p.logit.win[true.win==0]))/N


# ORGANIZING OUTPUT
out <- c(paste0(namvec[i], " (", N, ")"), 
         round(ePCP.model.run*100,d=1),
         round(ePCP.lpm.run*100,  d=1),
         round(ePCP.logit.run*100,d=1),
         round(ePCP.model.win*100,d=1),
         round(ePCP.lpm.win*100,  d=1),
         round(ePCP.logit.win*100,d=1))

# STACK
df_insample <- rbind(df_insample, out) # Stack the output in the data frame

}

#####################################
# WRITE OUT A TABLE IN LATEX CODE
#####################################
names(df_insample) <- c("Subset", "Logical", "LMP", "Logit", "Logical", "LMP", "Logit")
rownames(df_insample) <- NULL
print(df_insample)

print(xtable(df_insample, digits=c(0,1,1,1,1,1,1,1)),
      include.rownames=FALSE)


#########################################################################################
# END OF THIS R SOURCE FILE
#########################################################################################